package com.hdaccp.ch10

import org.apache.spark.sql.{SaveMode, SparkSession}

object MyLogCleanJob1 {
  def main(args: Array[String]): Unit = {
    val spark = SparkSession.builder()
      .master("local[2]")
      .appName("A")
      .getOrCreate()
    import spark.implicits._

    val rdd = spark.sparkContext.textFile("F:\\accp教学\\sparkresources\\log4")

    val df = spark.createDataFrame(rdd.map(lines=>MyLogConverterUtil.parseLog(lines)),MyLogConverterUtil.struct)

    df.coalesce(1).write.format("parquet").mode(SaveMode.Overwrite).save("F:\\accp教学\\sparkresources\\cleanlog1")

    spark.stop()
  }
}
